df <- read_csv("./data/combinedstatshot.csv")
## Rows: 165 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): player, dunk_tot, dunk_pct, rim_tot, rim_pct, rim_asted, other2pt_...
## dbl (24): games, games_started, mp_per_g, fg_per_g, fga_per_g, fg_pct, fg2_p...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- df |> select(!c("games_started", "pf_per_g"))
colnames(df)
##  [1] "player"         "dunk_tot"       "dunk_pct"       "rim_tot"       
##  [5] "rim_pct"        "rim_asted"      "other2pt_tot"   "other2pt_pct"  
##  [9] "other2pt_asted" "3pt_tot"        "3pt_pct"        "3pt_asted"     
## [13] "games"          "mp_per_g"       "fg_per_g"       "fga_per_g"     
## [17] "fg_pct"         "fg2_per_g"      "fg2a_per_g"     "fg2_pct"       
## [21] "fg3_per_g"      "fg3a_per_g"     "fg3_pct"        "ft_per_g"      
## [25] "fta_per_g"      "ft_pct"         "orb_per_g"      "drb_per_g"     
## [29] "trb_per_g"      "ast_per_g"      "stl_per_g"      "blk_per_g"     
## [33] "tov_per_g"      "pts_per_g"

Plot 2pt attempts to 3pt attempts

From success script

path <- "~/BruinSports/data/draftdata.csv"
df_career_stats <- read_csv(path)
## Rows: 960 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): team_id, player, college_name, skip
## dbl (19): pick_overall, seasons, g, mp, pts, trb, ast, fg_pct, fg3_pct, ft_p...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
drop_cols <- c('team_id', 'skip', 'mp', 'pts', 'trb', 'ast')

df_career_stats <- df_career_stats |> select(!drop_cols)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(drop_cols)
## 
##   # Now:
##   data %>% select(all_of(drop_cols))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
df_lot_picks <- df_career_stats |> filter(pick_overall < 15)

colnames(df_career_stats)
##  [1] "pick_overall" "player"       "college_name" "seasons"      "g"           
##  [6] "fg_pct"       "fg3_pct"      "ft_pct"       "mp_per_g"     "pts_per_g"   
## [11] "trb_per_g"    "ast_per_g"    "ws"           "ws_per_48"    "bpm"         
## [16] "vorp"         "year"
# plotting the distribution of points + rebounds + asts (PRA) per game
df_lot_picks <- df_lot_picks |> mutate(
  pra_per_g = pts_per_g + trb_per_g + ast_per_g,
  pick_overall = factor(pick_overall)
)

# Group by draft position, and getting the mean
draft_means <- df_lot_picks |> group_by(pick_overall) |>
  summarize(avg_mpg = mean(mp_per_g),
            avg_ppg = mean(pts_per_g),
            avg_trbpg = mean(trb_per_g),
            avg_apg = mean(ast_per_g),
            avg_prapg = mean(pra_per_g))

print(draft_means, n = 14)
## # A tibble: 14 × 6
##    pick_overall avg_mpg avg_ppg avg_trbpg avg_apg avg_prapg
##    <fct>          <dbl>   <dbl>     <dbl>   <dbl>     <dbl>
##  1 1               31.2   18.8       6.52    4.32      29.7
##  2 2               26.9   13.7       4.96    2.88      21.6
##  3 3               30.0   17.6       6.3     3.52      27.4
##  4 4               27.5   12.8       5.39    2.48      20.7
##  5 5               26.0   12.7       4.92    3.55      21.2
##  6 6               23.2   10.2       4.35    2.26      16.8
##  7 7               27.6   13.4       4.88    2.77      21.0
##  8 8               21.1    8.82      3.37    1.77      14.0
##  9 9               24.5   10.9       4.55    2.52      18.0
## 10 10              23.5   10.4       3.61    2.31      16.3
## 11 11              21.1   10.2       3.69    2.16      16.0
## 12 12              24.6   10.6       4.47    2.23      17.3
## 13 13              22.6   10.9       3.99    2.23      17.1
## 14 14              20.3    8.78      3.75    1.3       13.8
df_lot_picks |> ggplot(aes(x = pick_overall, y = pra_per_g)) +
  geom_boxplot() + 
  labs(x = "Draft Pick", y = "Points-Rebounds-Assists Per Game")

Plotting the players in the top 25% in PRA per game

df2 <- left_join(df, df_lot_picks, by = c("player"), suffix = c("_college", "_nba"))
df2 <- df2 |> 
  separate_wider_delim(dunk_tot, delim = "-", names = c("dunk_made", "dunk_attempts"))

df2 <- df2 |>
  separate_wider_delim(rim_tot, delim = "-", names = c("rim_made", "rim_attempts"))

df2 <- df2 |>
  separate_wider_delim(other2pt_tot, delim = "-", names = c("other2pt_made", "other2pt_attempts"))

df2 <-df2 |>
  mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made, other2pt_attempts), as.numeric))

defining bust metric

df2 <- df2 |> mutate(
  vorp_per_g = vorp / g
)

df_top_players <- df2 |> 
  group_by(pick_overall) |>
  summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.7)))

df_bottom_players <- df2 |> 
  group_by(pick_overall) |>
  summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.3)))

# metric favors big men
# make the rebound percentile higher

is_not_bust <- function(pick_number, df_top_players = df_top_players, df2 = df2) {
  ppg <- df_top_players |> pull(pts_per_g_nba)
  apg <- df_top_players |> pull(ast_per_g_nba)
  prapg <- df_top_players |> pull(pra_per_g)
  
  df_top_rb <- df2 |> 
    group_by(pick_overall) |>
    summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.8))
  
  rpg <- df_top_rb |> pull(trb_per_g)
  
  df_top_vorp <- df2 |> 
    group_by(pick_overall) |>
    summarize(vorp_per_g = median(vorp_per_g))
  
  vorppg <- df_top_vorp |> pull(vorp_per_g)
  
  df <- df2 |> filter(pick_overall == pick_number) |>
  filter(pra_per_g >= prapg[[pick_number]] | ast_per_g_nba >= apg[[pick_number]] | trb_per_g_nba >= rpg[[pick_number]] | pts_per_g_nba >= ppg[[pick_number]]) |>
    filter(vorp_per_g >= vorppg[[pick_number]]) 
  
  # must also have played at least most of their career in the nba
  df <- df |> filter(seasons >= 4 / 5 * (2023-year))
  
  return(df)
}

is_bust <- function(pick_number, df_bottom_players = df_bottom_players, df2 = df2) {
  ppg <- df_bottom_players |> pull(pts_per_g_nba)
  
  apg <- df_bottom_players |> pull(ast_per_g_nba)
  prapg <- df_bottom_players |> pull(pra_per_g)
  
  df_bottom_rb <- df2 |> 
    group_by(pick_overall) |>
    summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.4))
  
  rpg <- df_bottom_rb |> pull(trb_per_g)
  
  df_bottom_vorp <- df2 |> 
    group_by(pick_overall) |>
    summarize(vorp_per_g = quantile(vorp_per_g, probs = 0.3))
  
  vorppg <- df_bottom_vorp |> pull(vorp_per_g)
  
  # playing less than half the seasons since drafted makes you a bust
  df <- df2 |> filter(pick_overall == pick_number) |>
  filter((pra_per_g < prapg[[pick_number]] & ast_per_g_nba < apg[[pick_number]] & trb_per_g_nba < rpg[[pick_number]] & pts_per_g_nba < ppg[[pick_number]] & vorp_per_g < vorppg[[pick_number]]) | seasons < 1 / 2 * (2023-year))
  
  
  return(df)
}
# first overall picks (not bust)
df_pick_1 <- is_not_bust(1, df_top_players, df2)

# first overall picks (bust)
df_pick_1_bust <- is_bust(1, df_bottom_players, df2)

df_pick_1
## # A tibble: 7 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 John Wall              33            36 91.7%         116          182 63.7%  
## 2 Kyrie Irving            0             0 0%             26           39 66.7%  
## 3 Anthony Davis          96            98 98.0%         152          174 87.4%  
## 4 Karl-Anthony T…        22            24 91.7%          87          121 71.9%  
## 5 Ben Simmons            56            61 91.8%         159          220 72.3%  
## 6 Zion Williamson        72            79 91.1%         247          313 78.9%  
## 7 Anthony Edwards        27            27 100.0%         89          129 69.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_1_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Anthony Bennett        53            58 91.4%         100          140 71.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# second overall
df_pick_2 <- is_not_bust(2, df_top_players, df2)
df_pick_2_bust <- is_bust(2, df_bottom_players, df2)

df_pick_2
## # A tibble: 5 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 D'Angelo Russe…         4             4 100.0%         70          110 63.6%  
## 2 Brandon Ingram         17            17 100.0%         69          117 59.0%  
## 3 Lonzo Ball             37            40 92.5%          94          120 78.3%  
## 4 Ja Morant              28            31 90.3%         160          264 60.6%  
## 5 Chet Holmgren          57            57 100.0%        105          125 84.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_2_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Derrick Willia…        56            60 93.3%         135          188 71.8%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# third overall
df_pick_3 <- is_not_bust(3, df_top_players, df2)
df_pick_3_bust <- is_bust(3, df_bottom_players, df2)

df_pick_3
## # A tibble: 4 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Bradley Beal        18            20 90.0%          89          137 65.0%  
## 2 Joel Embiid         30            30 100.0%         80           99 80.8%  
## 3 Jayson Tatum        18            21 85.7%          79          126 62.7%  
## 4 Evan Mobley         63            66 95.5%         113          144 78.5%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_3_bust
## # A tibble: 1 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Jahlil Okafor        64            67 95.5%         213          270 78.9%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourth overall
df_pick_4 <- is_not_bust(4, df_top_players, df2)
df_pick_4_bust <- is_bust(4, df_bottom_players, df2)

df_pick_4
## # A tibble: 4 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Aaron Gordon           54            56 96.4%         137          198 69.2%  
## 2 Jaren Jackson …        31            31 100.0%         61           93 65.6%  
## 3 Scottie Barnes         19            21 90.5%          61           89 68.5%  
## 4 Keegan Murray          63            67 94.0%         196          277 70.8%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_4_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## #   dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## #   rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## #   3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## #   fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# fifth overall
df_pick_5 <- is_not_bust(5, df_top_players, df2)
df_pick_5_bust <- is_bust(5, df_bottom_players, df2)

df_pick_5
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 DeMarcus Cousi…        53            57 93.0%         144          189 76.2%  
## 2 De'Aaron Fox           20            21 95.2%         131          203 64.5%  
## 3 Trae Young              0             0 0%            105          201 52.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_5_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Thomas Robinson        70            83 84.3%         169          262 64.5%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# sixth overall
df_pick_6 <- is_not_bust(6, df_top_players, df2)
df_pick_6_bust <- is_bust(6, df_bottom_players, df2)

df_pick_6
## # A tibble: 5 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Damian Lillard        13            17 76.5%          98          169 58.0%  
## 2 Nerlens Noel          48            50 96.0%          76           99 76.8%  
## 3 Marcus Smart          16            18 88.9%          78          110 70.9%  
## 4 Buddy Hield           18            22 81.8%         119          178 66.9%  
## 5 Onyeka Okongwu        58            61 95.1%         135          186 72.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_6_bust
## # A tibble: 1 × 55
##   player    dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>         <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ekpe Udoh        30            32 93.8%          78          109 71.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# seventh overall
df_pick_7 <- is_not_bust(7, df_top_players, df2)
df_pick_7_bust <- is_bust(7, df_bottom_players, df2)

df_pick_7
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Julius Randle          37            40 92.5%         132          197 67.0%  
## 2 Jamal Murray           18            19 94.7%          77          111 69.4%  
## 3 Lauri Markkanen        20            24 83.3%          65          100 65.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_7_bust
## # A tibble: 1 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ben McLemore        44            45 97.8%          90          126 71.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eighth overall
df_pick_8 <- is_not_bust(8, df_top_players, df2)
df_pick_8_bust <- is_bust(8, df_bottom_players, df2)

df_pick_8
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Al-Farouq Aminu        46            48 95.8%         112          173 64.7%  
## 2 Kentavious Cal…        15            16 93.8%          63           94 67.0%  
## 3 Franz Wagner           11            11 100.0%         63           93 67.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_8_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## #   dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## #   rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## #   3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## #   fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# ninth overall
df_pick_9 <- is_not_bust(9, df_top_players, df2)
df_pick_9_bust <- is_bust(9, df_bottom_players, df2)

df_pick_9
## # A tibble: 5 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Gordon Hayward        19            20 95.0%          89          128 69.5%  
## 2 Kemba Walker           3             3 100.0%        115          196 58.7%  
## 3 Andre Drummond        80            89 89.9%         130          185 70.3%  
## 4 Trey Burke             9             9 100.0%         67          105 63.8%  
## 5 Jakob Poeltl          32            34 94.1%         199          284 70.1%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_9_bust
## # A tibble: 1 × 55
##   player     dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>          <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Kevin Knox        18            20 90.0%          65           99 65.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# tenth overall
df_pick_10 <- is_not_bust(10, df_top_players, df2)
df_pick_10_bust <- is_bust(10, df_bottom_players, df2)

df_pick_10
## # A tibble: 5 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Paul George          18            22 81.8%          70          106 66.0%  
## 2 CJ McCollum           3             3 100.0%         34           63 54.0%  
## 3 Elfrid Payton        21            24 87.5%         169          247 68.4%  
## 4 Mikal Bridges        35            42 83.3%         109          161 67.7%  
## 5 Jalen Smith          49            52 94.2%         114          158 72.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_10_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ziaire Williams        10            11 90.9%          26           49 53.1%  
## 2 Johnny Davis           16            19 84.2%          89          143 62.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eleventh overall
df_pick_11 <- is_not_bust(11, df_top_players, df2)
df_pick_11_bust <- is_bust(11, df_bottom_players, df2)

df_pick_11
## # A tibble: 4 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Klay Thompson           8             8 100.0%         66          110 60.0%  
## 2 Myles Turner           11            13 84.6%          40           54 74.1%  
## 3 Domantas Sabon…        22            24 91.7%         157          214 73.4%  
## 4 Shai Gilgeous-…        11            11 100.0%        108          182 59.3%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_11_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 James Bouknight        12            12 100.0%         52           79 65.8%  
## 2 Jett Howard             6             6 100.0%         29           47 61.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# twelveth overall
df_pick_12 <- is_not_bust(12, df_top_players, df2)
df_pick_12_bust <- is_bust(12, df_bottom_players, df2)

df_pick_12
## # A tibble: 5 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Steven Adams           29            33 87.9%          85          129 65.9%  
## 2 Miles Bridges          30            35 85.7%          84          128 65.6%  
## 3 Tyrese Halibur…         7             8 87.5%          46           62 74.2%  
## 4 Jalen Williams         25            27 92.6%         124          186 66.7%  
## 5 Dereck Lively …        54            55 98.2%          74           96 77.1%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_12_bust
## # A tibble: 1 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Xavier Henry        17            17 100.0%         60           90 66.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# thirteenth overall
df_pick_13 <- is_not_bust(13, df_top_players, df2)
df_pick_13_bust <- is_bust(13, df_bottom_players, df2)

df_pick_13
## # A tibble: 7 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ed Davis               26            27 96.3%          42           50 84.0%  
## 2 Kelly Olynyk           25            28 89.3%         152          212 71.7%  
## 3 Zach LaVine            21            25 84.0%          51           90 56.7%  
## 4 Devin Booker            8             9 88.9%          42           59 71.2%  
## 5 Donovan Mitche…         9            13 69.2%          64          116 55.2%  
## 6 Tyler Herro             4             5 80.0%          56           84 66.7%  
## 7 Jalen Duren            70            76 92.1%         111          152 73.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_13_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Kendall Marsha…         0             0 0%             35           53 66.0%  
## 2 Jerome Robinson        12            13 92.3%          98          157 62.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourteenth overall
df_pick_14 <- is_not_bust(14, df_top_players, df2)
df_pick_14_bust <- is_bust(14, df_bottom_players, df2)

df_pick_14
## # A tibble: 4 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Marcus Morris        31            33 93.9%         114          147 77.6%  
## 2 T.J. Warren          37            37 100.0%        192          251 76.5%  
## 3 Cameron Payne         3             3 100.0%         53           87 60.9%  
## 4 Bam Adebayo          99           105 94.3%         138          185 74.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_14_bust
## # A tibble: 1 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Romeo Langford        10            14 71.4%          91          138 65.9%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# combining dataframes

df_good <- bind_rows(df_pick_1, df_pick_2, df_pick_3, df_pick_4, df_pick_5, df_pick_6, df_pick_7, df_pick_8, df_pick_9,
              df_pick_10, df_pick_11, df_pick_12, df_pick_13, df_pick_14)

df_busts <- bind_rows(df_pick_1_bust, df_pick_2_bust, df_pick_3_bust, df_pick_4_bust, df_pick_5_bust, df_pick_6_bust, df_pick_7_bust, df_pick_8_bust, df_pick_9_bust, df_pick_10_bust, df_pick_11_bust, df_pick_12_bust, df_pick_13_bust, df_pick_14_bust)

print(df_good, n = 20)
## # A tibble: 64 × 55
##    player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
##  1 John Wall             33            36 91.7%         116          182 63.7%  
##  2 Kyrie Irving           0             0 0%             26           39 66.7%  
##  3 Anthony Davis         96            98 98.0%         152          174 87.4%  
##  4 Karl-Anthony …        22            24 91.7%          87          121 71.9%  
##  5 Ben Simmons           56            61 91.8%         159          220 72.3%  
##  6 Zion Williams…        72            79 91.1%         247          313 78.9%  
##  7 Anthony Edwar…        27            27 100.0%         89          129 69.0%  
##  8 D'Angelo Russ…         4             4 100.0%         70          110 63.6%  
##  9 Brandon Ingram        17            17 100.0%         69          117 59.0%  
## 10 Lonzo Ball            37            40 92.5%          94          120 78.3%  
## 11 Ja Morant             28            31 90.3%         160          264 60.6%  
## 12 Chet Holmgren         57            57 100.0%        105          125 84.0%  
## 13 Bradley Beal          18            20 90.0%          89          137 65.0%  
## 14 Joel Embiid           30            30 100.0%         80           99 80.8%  
## 15 Jayson Tatum          18            21 85.7%          79          126 62.7%  
## 16 Evan Mobley           63            66 95.5%         113          144 78.5%  
## 17 Aaron Gordon          54            56 96.4%         137          198 69.2%  
## 18 Jaren Jackson…        31            31 100.0%         61           93 65.6%  
## 19 Scottie Barnes        19            21 90.5%          61           89 68.5%  
## 20 Keegan Murray         63            67 94.0%         196          277 70.8%  
## # ℹ 44 more rows
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>, …
df_good |> pull(player)
##  [1] "John Wall"                "Kyrie Irving"            
##  [3] "Anthony Davis"            "Karl-Anthony Towns"      
##  [5] "Ben Simmons"              "Zion Williamson"         
##  [7] "Anthony Edwards"          "D'Angelo Russell"        
##  [9] "Brandon Ingram"           "Lonzo Ball"              
## [11] "Ja Morant"                "Chet Holmgren"           
## [13] "Bradley Beal"             "Joel Embiid"             
## [15] "Jayson Tatum"             "Evan Mobley"             
## [17] "Aaron Gordon"             "Jaren Jackson Jr."       
## [19] "Scottie Barnes"           "Keegan Murray"           
## [21] "DeMarcus Cousins"         "De'Aaron Fox"            
## [23] "Trae Young"               "Damian Lillard"          
## [25] "Nerlens Noel"             "Marcus Smart"            
## [27] "Buddy Hield"              "Onyeka Okongwu"          
## [29] "Julius Randle"            "Jamal Murray"            
## [31] "Lauri Markkanen"          "Al-Farouq Aminu"         
## [33] "Kentavious Caldwell-Pope" "Franz Wagner"            
## [35] "Gordon Hayward"           "Kemba Walker"            
## [37] "Andre Drummond"           "Trey Burke"              
## [39] "Jakob Poeltl"             "Paul George"             
## [41] "CJ McCollum"              "Elfrid Payton"           
## [43] "Mikal Bridges"            "Jalen Smith"             
## [45] "Klay Thompson"            "Myles Turner"            
## [47] "Domantas Sabonis"         "Shai Gilgeous-Alexander" 
## [49] "Steven Adams"             "Miles Bridges"           
## [51] "Tyrese Haliburton"        "Jalen Williams"          
## [53] "Dereck Lively II"         "Ed Davis"                
## [55] "Kelly Olynyk"             "Zach LaVine"             
## [57] "Devin Booker"             "Donovan Mitchell"        
## [59] "Tyler Herro"              "Jalen Duren"             
## [61] "Marcus Morris"            "T.J. Warren"             
## [63] "Cameron Payne"            "Bam Adebayo"
good_list <- df_good |> pull(player)


df_busts |> pull(player)
##  [1] "Anthony Bennett"  "Derrick Williams" "Jahlil Okafor"    "Thomas Robinson" 
##  [5] "Ekpe Udoh"        "Ben McLemore"     "Kevin Knox"       "Ziaire Williams" 
##  [9] "Johnny Davis"     "James Bouknight"  "Jett Howard"      "Xavier Henry"    
## [13] "Kendall Marshall" "Jerome Robinson"  "Romeo Langford"
bust_list <- df_busts |> pull(player)
plot_good <- ggplot(df_good, aes(x = fg2_per_g, y = fg3_per_g)) +
  geom_point(color = "green", size = 4, alpha = 0.5) +
  geom_label_repel(data = df_good, aes(label = player), size = 1.5, max.overlaps = 20) +
  labs(title = "CBB Shot Selection for Good Value NBA Lottery Picks", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
  theme_bw()

plot_busts <- ggplot(df_busts, aes(x = fg2_per_g, y = fg3_per_g, label = player)) +
  geom_point(color = "red", size = 4, alpha = 0.5) +
  geom_label_repel(size = 3) +
  labs(title = "CBB Shot Selection for NBA Lottery Busts", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
  theme_bw()


plot_combined <- ggplot(df2, aes(x = fg2_per_g, y = fg3_per_g)) +
  geom_point(aes(color = "Average value"), size = 4, alpha = 0.2) +
  geom_point(data = df_busts, aes(color = "Bad value"), size = 4, alpha = 0.5) +
  geom_point(data = df_good, aes(color = "Good value"), size = 4, alpha = 0.5) +
  labs(title = "CBB Shot Selection for NBA Lottery Picks", 
       x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game", color = "Value") +
  scale_color_manual(values = c("Bad value" = "red", "Good value" = "green", "Average value" = "grey")) +
  theme_bw()

plot_combined

plot_busts

plot_good

Principal Component Analysis of college stats, for grouping/covariance

library(corrr)
library(ggcorrplot)
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

Preparing Data

colnames(df2)
##  [1] "player"            "dunk_made"         "dunk_attempts"    
##  [4] "dunk_pct"          "rim_made"          "rim_attempts"     
##  [7] "rim_pct"           "rim_asted"         "other2pt_made"    
## [10] "other2pt_attempts" "other2pt_pct"      "other2pt_asted"   
## [13] "3pt_tot"           "3pt_pct"           "3pt_asted"        
## [16] "games"             "mp_per_g_college"  "fg_per_g"         
## [19] "fga_per_g"         "fg_pct_college"    "fg2_per_g"        
## [22] "fg2a_per_g"        "fg2_pct"           "fg3_per_g"        
## [25] "fg3a_per_g"        "fg3_pct_college"   "ft_per_g"         
## [28] "fta_per_g"         "ft_pct_college"    "orb_per_g"        
## [31] "drb_per_g"         "trb_per_g_college" "ast_per_g_college"
## [34] "stl_per_g"         "blk_per_g"         "tov_per_g"        
## [37] "pts_per_g_college" "pick_overall"      "college_name"     
## [40] "seasons"           "g"                 "fg_pct_nba"       
## [43] "fg3_pct_nba"       "ft_pct_nba"        "mp_per_g_nba"     
## [46] "pts_per_g_nba"     "trb_per_g_nba"     "ast_per_g_nba"    
## [49] "ws"                "ws_per_48"         "bpm"              
## [52] "vorp"              "year"              "pra_per_g"        
## [55] "vorp_per_g"
df3 <- column_to_rownames(df2, var = "player")

df_cbb <- df3 |> select(
  dunk_made, dunk_attempts, dunk_pct, rim_made, rim_attempts, rim_pct, rim_asted,
  other2pt_made, other2pt_attempts, other2pt_pct, other2pt_asted, fg2_pct, fg3_per_g, fg3a_per_g, 
 `3pt_asted`, games, ft_per_g, fta_per_g, ast_per_g_college, orb_per_g, drb_per_g,
  stl_per_g, blk_per_g, tov_per_g, pts_per_g_college
) |> rename(fg3_asted = `3pt_asted`) |> 
  mutate(dunk_pct = parse_number(dunk_pct) / 100, # changing to percentages
         rim_pct = parse_number(rim_pct) / 100,
         rim_asted = parse_number(rim_asted) / 100,
         other2pt_pct = parse_number(other2pt_pct) / 100,
         other2pt_asted = parse_number(other2pt_asted) / 100,
         fg3_asted = parse_number(fg3_asted) / 100,
         fg3_pct_per_g = fg3_per_g / fg3a_per_g)

df_cbb <- df_cbb |> 
  mutate(fg3_pct_per_g = coalesce(fg3_pct_per_g, 0)) |>
  relocate(fg3_pct_per_g, .after = fg3_asted)

to_per_game <- function(x, games) {
  x <- x / games
  return(x)
}

df_cbb <- df_cbb |> # making everything per game
  mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made, 
                  other2pt_attempts), function(x) to_per_game(x, games)))

colSums(is.na(df_cbb))
##         dunk_made     dunk_attempts          dunk_pct          rim_made 
##                 0                 0                 0                 0 
##      rim_attempts           rim_pct         rim_asted     other2pt_made 
##                 0                 0                 0                 0 
## other2pt_attempts      other2pt_pct    other2pt_asted           fg2_pct 
##                 0                 0                 0                 0 
##         fg3_per_g        fg3a_per_g         fg3_asted     fg3_pct_per_g 
##                 0                 0                 0                 0 
##             games          ft_per_g         fta_per_g ast_per_g_college 
##                 0                 0                 0                 0 
##         orb_per_g         drb_per_g         stl_per_g         blk_per_g 
##                 0                 0                 0                 0 
##         tov_per_g pts_per_g_college 
##                 0                 0

Following this guide: https://www.datacamp.com/tutorial/pca-analysis-r

df_cbb_scaled <- as_tibble(scale(df_cbb |> select(-games)))
df_cbb_scaled
## # A tibble: 165 × 25
##    dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct rim_asted
##        <dbl>         <dbl>    <dbl>    <dbl>        <dbl>   <dbl>     <dbl>
##  1     0.338        0.359    0.125    0.886        1.15    -0.582   -0.936 
##  2    -0.789       -0.813    0.332   -0.925       -1.11     1.48    -1.47  
##  3     1.25         1.23     0.325    1.46         1.11     1.46     0.662 
##  4     0.599        0.655    0.0208   0.478        0.382    0.591    0.733 
##  5     1.13         1.15     0.215    1.44         1.18     1.16     0.633 
##  6     0.246        0.233    0.270    0.0605      -0.0262   0.521   -0.0688
##  7    -0.560       -0.589    0.387    0.233        0.367   -0.443    0.0432
##  8     0.102        0.0619   0.408   -0.246       -0.174   -0.443    0.0668
##  9    -0.635       -0.661    0.353   -0.687       -0.754    0.228    0.615 
## 10    -0.619       -0.573   -0.560   -0.839       -0.861   -0.261   -0.623 
## # ℹ 155 more rows
## # ℹ 18 more variables: other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <dbl>, other2pt_asted <dbl>, fg2_pct <dbl>, fg3_per_g <dbl>,
## #   fg3a_per_g <dbl>, fg3_asted <dbl>, fg3_pct_per_g <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>,
## #   stl_per_g <dbl>, blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
# df_cbb_scaled$player <- df_cbb$player
# df_cbb_scaled <- df_cbb_scaled |> relocate(player, .before = dunk_made)

corr_matrix <- cor(df_cbb_scaled)
colnames(corr_matrix)
##  [1] "dunk_made"         "dunk_attempts"     "dunk_pct"         
##  [4] "rim_made"          "rim_attempts"      "rim_pct"          
##  [7] "rim_asted"         "other2pt_made"     "other2pt_attempts"
## [10] "other2pt_pct"      "other2pt_asted"    "fg2_pct"          
## [13] "fg3_per_g"         "fg3a_per_g"        "fg3_asted"        
## [16] "fg3_pct_per_g"     "ft_per_g"          "fta_per_g"        
## [19] "ast_per_g_college" "orb_per_g"         "drb_per_g"        
## [22] "stl_per_g"         "blk_per_g"         "tov_per_g"        
## [25] "pts_per_g_college"
ggcorrplot(corr_matrix, method = "square")

ggcorrplot(corr_matrix, method = "square", hc.order = TRUE,
           type = "lower", tl.cex = 7, title = "Correlations between different college statistics")

Guides: http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/

Kmeans Clustering: https://medium.com/@zullinira23/implementation-of-principal-component-analysis-pca-on-k-means-clustering-in-r-794f03ec15f

df_cbb.sample <- df_cbb |> sample_frac(1, replace = FALSE)
head(df_cbb.sample)
##                 dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## Mo Bamba        2.1666667     2.2666667    0.956 3.633333     4.766667   0.762
## Jaxson Hayes    2.3125000     2.3437500    0.987 3.281250     3.843750   0.854
## Harrison Barnes 0.4533333     0.4533333    1.000 0.880000     1.200000   0.733
## Thomas Robinson 0.6666667     0.7904762    0.843 1.609524     2.495238   0.645
## Brandon Miller  0.6216216     0.7027027    0.885 2.297297     3.972973   0.578
## Anthony Bennett 1.5142857     1.6571429    0.914 2.857143     4.000000   0.714
##                 rim_asted other2pt_made other2pt_attempts other2pt_pct
## Mo Bamba            0.532     0.7666667          2.500000        0.307
## Jaxson Hayes        0.829     0.5625000          1.437500        0.391
## Harrison Barnes     0.455     1.5066667          3.906667        0.386
## Thomas Robinson     0.627     0.7619048          2.180952        0.349
## Brandon Miller      0.259     0.8378378          2.513514        0.333
## Anthony Bennett     0.620     1.6000000          3.685714        0.434
##                 other2pt_asted fg2_pct fg3_per_g fg3a_per_g fg3_asted
## Mo Bamba                 0.217   0.603       0.5        1.7     0.857
## Jaxson Hayes             0.333   0.728       0.0        0.0     0.000
## Harrison Barnes          0.204   0.469       1.5        4.4     0.837
## Thomas Robinson          0.500   0.525       0.1        0.1     0.857
## Brandon Miller           0.032   0.483       2.9        7.5     0.830
## Anthony Bennett          0.768   0.587       1.0        2.7     0.972
##                 fg3_pct_per_g games ft_per_g fta_per_g ast_per_g_college
## Mo Bamba            0.2941176    30      2.7       4.0               0.5
## Jaxson Hayes        0.0000000    32      2.3       3.1               0.3
## Harrison Barnes     0.3409091    75      3.1       4.3               1.3
## Thomas Robinson     1.0000000   105      2.2       3.6               1.0
## Brandon Miller      0.3866667    37      3.9       4.6               2.1
## Anthony Bennett     0.3703704    35      3.5       5.1               1.0
##                 orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
## Mo Bamba              3.2       7.3       0.8       3.7       1.5
## Jaxson Hayes          1.8       3.3       0.6       2.2       0.9
## Harrison Barnes       2.0       3.5       0.9       0.4       1.9
## Thomas Robinson       2.1       5.2       0.6       0.7       1.6
## Brandon Miller        2.1       6.2       0.9       0.9       2.2
## Anthony Bennett       2.5       5.7       0.7       1.2       1.9
##                 pts_per_g_college
## Mo Bamba                     12.9
## Jaxson Hayes                 10.0
## Harrison Barnes              16.3
## Thomas Robinson               9.8
## Brandon Miller               18.8
## Anthony Bennett              16.1
df_cbb.pca <- PCA(df_cbb.sample |> select(-games), scale.unit = TRUE, graph = FALSE)

fviz_eig(df_cbb.pca, addlabels = TRUE, main = "Statistics Represented in Lower Dimensional Components")

fviz_pca_var(df_cbb.pca, col.var = "cos2", gradient.cols = c("lightblue", "black"), repel = TRUE)

var <- get_pca_var(df_cbb.pca)

fviz_cos2(df_cbb.pca, choice = "var", axes = 1:2) + 
  labs(title = "Quality of Representation to PCA Dimensions 1 and 2")

fviz_pca_ind(df_cbb.pca, repel = TRUE, labelsize = 1) +
  labs(title = "NBA Lottery Picks on PCA Dimensions 1 and 2")

summary(df_cbb.pca)
## 
## Call:
## PCA(X = select(df_cbb.sample, -games), scale.unit = TRUE, graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance               7.916   5.341   2.094   1.495   1.261   1.201   1.002
## % of var.             31.663  21.364   8.376   5.978   5.045   4.803   4.008
## Cumulative % of var.  31.663  53.027  61.403  67.381  72.426  77.229  81.237
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               0.967   0.751   0.556   0.446   0.420   0.327   0.267
## % of var.              3.867   3.006   2.225   1.786   1.679   1.310   1.067
## Cumulative % of var.  85.104  88.110  90.335  92.121  93.800  95.110  96.177
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19  Dim.20  Dim.21
## Variance               0.258   0.213   0.164   0.141   0.072   0.062   0.024
## % of var.              1.032   0.850   0.655   0.563   0.286   0.249   0.097
## Cumulative % of var.  97.209  98.059  98.714  99.277  99.564  99.813  99.910
##                       Dim.22  Dim.23  Dim.24  Dim.25
## Variance               0.011   0.006   0.004   0.002
## % of var.              0.046   0.023   0.014   0.007
## Cumulative % of var.  99.955  99.979  99.993 100.000
## 
## Individuals (the 10 first)
##                       Dist    Dim.1    ctr   cos2    Dim.2    ctr   cos2  
## Mo Bamba          |  6.240 |  5.048  1.951  0.655 |  0.723  0.059  0.013 |
## Jaxson Hayes      |  7.789 |  6.143  2.890  0.622 | -1.826  0.378  0.055 |
## Harrison Barnes   |  3.113 | -1.045  0.084  0.113 | -0.397  0.018  0.016 |
## Thomas Robinson   |  5.940 |  0.643  0.032  0.012 | -2.155  0.527  0.132 |
## Brandon Miller    |  4.490 | -2.143  0.352  0.228 |  1.766  0.354  0.155 |
## Anthony Bennett   |  4.389 |  3.010  0.694  0.470 |  0.997  0.113  0.052 |
## Steven Adams      |  6.140 |  3.716  1.057  0.366 | -2.840  0.915  0.214 |
## Nerlens Noel      |  7.353 |  5.366  2.205  0.533 |  0.882  0.088  0.014 |
## Cameron Johnson   |  3.881 | -1.383  0.146  0.127 | -2.260  0.579  0.339 |
## Dennis Smith Jr.  |  5.451 | -2.658  0.541  0.238 |  3.803  1.641  0.487 |
##                    Dim.3    ctr   cos2  
## Mo Bamba          -0.148  0.006  0.001 |
## Jaxson Hayes      -2.273  1.495  0.085 |
## Harrison Barnes    1.752  0.888  0.317 |
## Thomas Robinson    1.695  0.831  0.081 |
## Brandon Miller     0.902  0.235  0.040 |
## Anthony Bennett    2.370  1.626  0.292 |
## Steven Adams      -2.476  1.774  0.163 |
## Nerlens Noel      -3.100  2.781  0.178 |
## Cameron Johnson    1.558  0.702  0.161 |
## Dennis Smith Jr.  -1.993  1.150  0.134 |
## 
## Variables (the 10 first)
##                      Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3    ctr
## dunk_made         |  0.835  8.802  0.697 |  0.336  2.117  0.113 | -0.025  0.029
## dunk_attempts     |  0.829  8.675  0.687 |  0.338  2.140  0.114 | -0.025  0.030
## dunk_pct          |  0.334  1.412  0.112 | -0.120  0.269  0.014 |  0.228  2.485
## rim_made          |  0.584  4.305  0.341 |  0.672  8.468  0.452 | -0.088  0.370
## rim_attempts      |  0.451  2.571  0.203 |  0.740 10.243  0.547 | -0.093  0.410
## rim_pct           |  0.724  6.625  0.524 | -0.148  0.409  0.022 | -0.001  0.000
## rim_asted         |  0.767  7.441  0.589 | -0.354  2.351  0.126 |  0.182  1.578
## other2pt_made     |  0.106  0.141  0.011 |  0.601  6.772  0.362 |  0.560 14.964
## other2pt_attempts |  0.102  0.131  0.010 |  0.651  7.924  0.423 |  0.484 11.203
## other2pt_pct      |  0.007  0.001  0.000 | -0.045  0.039  0.002 |  0.359  6.140
##                     cos2  
## dunk_made          0.001 |
## dunk_attempts      0.001 |
## dunk_pct           0.052 |
## rim_made           0.008 |
## rim_attempts       0.009 |
## rim_pct            0.000 |
## rim_asted          0.033 |
## other2pt_made      0.313 |
## other2pt_attempts  0.235 |
## other2pt_pct       0.129 |
pca2 <- prcomp(df_cbb.sample, center = TRUE, scale = TRUE)
summary(pca2)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.8233 2.3577 1.45399 1.29215 1.18882 1.10678 1.00292
## Proportion of Variance 0.3066 0.2138 0.08131 0.06422 0.05436 0.04711 0.03869
## Cumulative Proportion  0.3066 0.5204 0.60167 0.66589 0.72025 0.76736 0.80605
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.98328 0.87000 0.75739 0.66985 0.65034 0.64385 0.56902
## Proportion of Variance 0.03719 0.02911 0.02206 0.01726 0.01627 0.01594 0.01245
## Cumulative Proportion  0.84323 0.87234 0.89441 0.91166 0.92793 0.94388 0.95633
##                           PC15    PC16    PC17    PC18    PC19    PC20   PC21
## Standard deviation     0.51470 0.49095 0.41133 0.40183 0.37408 0.26745 0.2038
## Proportion of Variance 0.01019 0.00927 0.00651 0.00621 0.00538 0.00275 0.0016
## Cumulative Proportion  0.96652 0.97579 0.98230 0.98851 0.99389 0.99664 0.9982
##                          PC22    PC23    PC24    PC25    PC26
## Standard deviation     0.1527 0.10681 0.07652 0.05949 0.04172
## Proportion of Variance 0.0009 0.00044 0.00023 0.00014 0.00007
## Cumulative Proportion  0.9991 0.99957 0.99980 0.99993 1.00000
df_cluster <- as.data.frame(-pca2$x[, 1:2])
df_cluster
##                                  PC1           PC2
## Mo Bamba                  5.12168936 -0.6472841174
## Jaxson Hayes              6.10602196  1.9333235932
## Harrison Barnes          -1.11071750  0.4675508975
## Thomas Robinson           0.41276682  2.4752367530
## Brandon Miller           -2.00076132 -1.9889065138
## Anthony Bennett           3.08683149 -0.9658417837
## Steven Adams              3.66839570  2.7680962536
## Nerlens Noel              5.45383914 -0.7991793564
## Cameron Johnson          -1.47106716  2.1704255345
## Dennis Smith Jr.         -2.43263222 -4.0080211081
## Wes Johnson               1.50316126 -1.3047269862
## Justise Winslow          -0.36808543 -0.0140660045
## Jordan Hawkins           -3.13721836  2.6918367917
## Alex Len                  3.60602390  2.2253867363
## Malik Monk               -2.28778612 -1.6797126865
## Nik Stauskas             -3.13687982  1.8802991525
## Derrick Favors            5.01881131 -0.5372658952
## Tyler Herro              -1.81846173  0.4356215772
## CJ McCollum              -3.90112719 -0.5500261983
## Devin Booker             -1.28579904  3.5063787159
## Evan Turner              -1.49673522 -0.4578015058
## Julius Randle             2.20418418 -3.0357101236
## Ben Simmons               3.55778734 -6.3959006442
## Franz Wagner             -1.24385093  2.2215940450
## Kevin Knox               -0.63604976 -0.7501807107
## Aaron Gordon              2.71643469 -0.5168844660
## Jonathan Isaac            1.48927680  0.4512240082
## Trey Burke               -3.60982350 -0.5620071724
## Trae Young               -6.66738911 -7.9875515113
## Jalen Suggs              -1.57680464 -1.3826079667
## Victor Oladipo           -0.63903222  2.5888631078
## Jarace Walker             1.30148259  1.2591517315
## Joel Embiid               4.49751364  0.3052975967
## James Bouknight          -1.52674532  0.8368198759
## Bennedict Mathurin       -1.07663046  1.0805356809
## Josh Jackson              1.48957168 -3.0475464620
## Lauri Markkanen          -0.18748839  0.1368727294
## Jabari Parker             2.40112005 -3.8466488862
## Noah Vonleh               1.11130925 -0.1751834144
## Miles Bridges            -0.51099544  0.8957909493
## Zach Collins              2.23468578  2.2257336106
## Deandre Ayton             6.32878662 -3.9424880892
## Frank Kaminsky           -0.20730366  4.1181677131
## Jabari Smith Jr.         -1.62886867 -1.4477810399
## Terrence Ross            -1.18817176  2.5860223513
## Otto Porter Jr.           0.04885497  1.8356037020
## Cody Zeller               2.02496946 -0.5680230150
## De'Aaron Fox             -0.87066774 -3.3180112387
## Jaren Jackson Jr.         1.49089242  1.7016335898
## D'Angelo Russell         -2.98270993 -2.6314533501
## Anthony Edwards          -1.40251473 -2.6178813170
## Chris Duarte             -1.92568686  1.3515954340
## DeMarcus Cousins          4.42466023 -2.5278733794
## P.J. Washington           0.33949853  1.0835362435
## Davion Mitchell          -3.51406172  1.6468468321
## Bradley Beal             -0.88415187 -0.6225122311
## Elfrid Payton            -2.00506164 -0.8904543832
## Derrick Williams          1.03830203 -1.0304451116
## Wendell Carter Jr.        3.36498747 -0.7216499359
## Willie Cauley-Stein       2.65175786  4.0213075524
## Kendall Marshall         -4.60320232  1.8524037126
## Patrick Patterson         2.53963602  2.1109010563
## Kentavious Caldwell-Pope -2.25315004  0.8810443210
## Michael Carter-Williams  -2.91272861  0.8684079705
## Jamal Murray             -2.37633667 -1.7128111826
## Shai Gilgeous-Alexander  -2.04551424 -2.3234644917
## Gordon Hayward           -0.68966882  0.8690154013
## Marvin Bagley III         5.65427831 -4.5448753922
## Dion Waiters             -1.92316128  2.9143303356
## Kris Dunn                -2.69748838  0.0951825819
## Collin Sexton            -2.52183153 -3.9991906129
## Ben McLemore              0.24239908 -0.1870327074
## Doug McDermott           -1.01124840  1.4023518631
## Myles Turner              1.86401362  1.8030642874
## Paul George              -2.10747433  0.1876406410
## Austin Rivers            -2.95329868 -1.3791937733
## Devin Vassell            -0.72773722  4.1046446591
## Anthony Black            -1.20227004 -1.7148572319
## Damian Lillard           -4.29694802 -0.3843340708
## Cole Aldrich              2.78307370  3.9300845772
## Kyrie Irving             -3.77122159 -3.0033308087
## Xavier Henry             -1.24518982  0.8915202286
## Cameron Payne            -4.48285978 -1.3236954751
## Dereck Lively II          5.06532407  4.9338352381
## Taurean Prince           -1.44792106  3.7467662602
## Kira Lewis Jr.           -3.26278428 -0.0954317440
## Anthony Davis             6.73630616 -1.2629548886
## Jeremy Lamb              -0.50899046  2.6527408299
## Denzel Valentine         -3.08887719  3.2154062834
## Michael Kidd-Gilchrist    1.49000302 -0.5168492854
## Meyers Leonard            2.74024056  3.6508169349
## De'Andre Hunter          -0.77514281  2.0904369415
## Donovan Mitchell         -3.15753551  1.8102604526
## Taylor Hendricks          1.15461505  0.2239571117
## Jaden Ivey               -1.86477689  0.0773337674
## Buddy Hield              -3.54777620  1.4535258538
## Bam Adebayo               5.99686501 -1.1625539440
## Greg Monroe               0.55772278 -0.6192425231
## Jalen Williams           -1.80913049  1.6958213307
## Onyeka Okongwu            5.79890613 -2.6047417619
## Stanley Johnson          -1.16931864 -1.1296582673
## Jakob Poeltl              3.07463033  1.0055357287
## Al-Farouq Aminu           1.02295866 -0.6557701582
## Gradey Dick              -1.44740283  1.2927758040
## Ochai Agbaji             -1.65972813  3.1665060860
## Jalen Duren               5.98165537 -0.5776256581
## Andre Drummond            5.95363392  0.5511351054
## Jett Howard              -2.91653599  1.5725624278
## Tyrese Haliburton        -2.28374305  2.7467761148
## Chet Holmgren             4.15750678  0.0605690899
## Ed Davis                  3.31971345  3.1931177330
## Kelly Olynyk              0.73492325  3.8811181762
## Alec Burks               -0.74320461 -2.0264013166
## Mikal Bridges            -1.20162590  3.5903451628
## Andrew Wiggins            0.36542307 -2.6025198229
## Markieff Morris           1.29699896  4.0373945397
## RJ Barrett               -0.08315247 -5.0528618008
## Johnny Davis             -1.33669032  0.1478916084
## Obi Toppin                3.01791562  0.9098400603
## Tristan Thompson          4.60556911 -2.1556621458
## Patrick Williams         -0.12807727  1.3341060184
## John Henson               2.97155356  3.1904829161
## Keegan Murray             1.59473589  0.7127863270
## Jalen Smith               2.10897015  1.8201712718
## Romeo Langford           -0.73466257 -2.2444498661
## Karl-Anthony Towns        2.08103954  1.1196065654
## Trey Lyles                2.26213434  2.3189123450
## Kemba Walker             -3.18777727 -0.5896508288
## Luke Kennard             -2.76829877  0.9772815337
## Shabazz Muhammad          0.36507564 -2.1338042145
## Marcus Morris             0.73895771  2.6323130400
## Zion Williamson           5.07447989 -4.6504795504
## Cason Wallace            -2.34104930  0.2152815400
## Jahlil Okafor             5.47754196 -2.7772955807
## Moses Moody              -1.46960252 -1.4190061757
## Joshua Primo             -1.55551858  3.7707023835
## Klay Thompson            -3.84131304  0.2800845197
## Domantas Sabonis          1.67699707  1.2190984214
## Coby White               -3.35981962 -0.9338304491
## Jimmer Fredette          -4.12885438  0.1532925420
## John Wall                -1.73494610 -3.9455798842
## Aaron Nesmith            -3.28346405  1.6855751483
## Lonzo Ball               -0.30972840  0.0007053551
## Jeremy Sochan             0.86229364  1.9652843807
## Brandon Knight           -3.54151495 -2.0093567331
## Scottie Barnes           -0.41005879  0.0002825293
## Ja Morant                -2.49001353 -3.1186017067
## Cade Cunningham          -2.90919016 -4.4345298643
## Isaac Okoro               0.42993503 -0.2975331821
## Jaylen Brown             -0.86593793 -2.2952001302
## Jayson Tatum             -0.78010288 -2.6444271533
## Jarrett Culver           -1.51221150  0.0681535683
## Rui Hachimura             0.89990828  2.6695528608
## Brandon Ingram           -1.66287033 -1.8607679236
## Cam Reddish              -4.14985479  0.0842589880
## Markelle Fultz           -2.05054203 -6.2356705555
## Jerome Robinson          -3.05834301  0.2586574385
## T.J. Warren               1.08912829 -0.3669642309
## Ekpe Udoh                 2.87709283 -2.0867754797
## Zach LaVine              -1.60422247  2.6481909219
## Marcus Smart             -2.59113529 -1.9709358714
## Ziaire Williams          -1.78971475  0.2486334187
## Evan Mobley               4.26687599 -2.6519044433
## Marquese Chriss           2.77746275 -0.4521718014
## Paolo Banchero            0.71365922 -2.7656057765
fviz_nbclust(df_cluster, kmeans, method = 'wss')

fviz_nbclust(df_cluster, kmeans, method = 'silhouette')

fviz_nbclust(df_cluster, kmeans, method = 'gap_stat')

k <- 15
df_cbb.kmeans <- kmeans(df_cluster, centers = k, nstart = 50)
df_cbb.kmeans2 <- kmeans(df_cluster, centers = 10, nstart = 50)
df_cbb.kmeans3 <- kmeans(df_cluster, centers = 5, nstart = 50)

df_cbb.kmeans
## K-means clustering with 15 clusters of sizes 1, 18, 8, 7, 17, 8, 15, 14, 11, 13, 13, 9, 13, 8, 10
## 
## Cluster means:
##           PC1        PC2
## 1  -6.6673891 -7.9875515
## 2  -1.5229678 -1.9259630
## 3   0.5984250  2.9518339
## 4   1.0651259 -3.2120996
## 5  -1.6278245  0.6366335
## 6   3.3507603  3.4891410
## 7  -1.3010475  2.9669816
## 8  -3.6051291 -0.4968607
## 9  -3.2787799  1.7867642
## 10 -0.1965191  0.1407494
## 11  1.9428659 -0.7028714
## 12  5.0209873 -3.5834690
## 13  2.0566469  1.4352045
## 14 -2.6116359 -3.9220472
## 15  5.5023843 -0.2136538
## 
## Clustering vector:
##                 Mo Bamba             Jaxson Hayes          Harrison Barnes 
##                       15                       15                        5 
##          Thomas Robinson           Brandon Miller          Anthony Bennett 
##                        3                        2                       11 
##             Steven Adams             Nerlens Noel          Cameron Johnson 
##                        6                       15                        7 
##         Dennis Smith Jr.              Wes Johnson          Justise Winslow 
##                       14                       11                       10 
##           Jordan Hawkins                 Alex Len               Malik Monk 
##                        9                        6                        2 
##             Nik Stauskas           Derrick Favors              Tyler Herro 
##                        9                       15                        5 
##              CJ McCollum             Devin Booker              Evan Turner 
##                        8                        7                        5 
##            Julius Randle              Ben Simmons             Franz Wagner 
##                        4                       12                        7 
##               Kevin Knox             Aaron Gordon           Jonathan Isaac 
##                       10                       11                       13 
##               Trey Burke               Trae Young              Jalen Suggs 
##                        8                        1                        2 
##           Victor Oladipo            Jarace Walker              Joel Embiid 
##                        7                       13                       15 
##          James Bouknight       Bennedict Mathurin             Josh Jackson 
##                        5                        5                        4 
##          Lauri Markkanen            Jabari Parker              Noah Vonleh 
##                       10                        4                       11 
##            Miles Bridges             Zach Collins            Deandre Ayton 
##                       10                       13                       12 
##           Frank Kaminsky         Jabari Smith Jr.            Terrence Ross 
##                        3                        2                        7 
##          Otto Porter Jr.              Cody Zeller             De'Aaron Fox 
##                        3                       11                        2 
##        Jaren Jackson Jr.         D'Angelo Russell          Anthony Edwards 
##                       13                       14                        2 
##             Chris Duarte         DeMarcus Cousins          P.J. Washington 
##                        5                       12                       10 
##          Davion Mitchell             Bradley Beal            Elfrid Payton 
##                        9                       10                        2 
##         Derrick Williams       Wendell Carter Jr.      Willie Cauley-Stein 
##                       11                       11                        6 
##         Kendall Marshall        Patrick Patterson Kentavious Caldwell-Pope 
##                        9                       13                        5 
##  Michael Carter-Williams             Jamal Murray  Shai Gilgeous-Alexander 
##                        9                        2                        2 
##           Gordon Hayward        Marvin Bagley III             Dion Waiters 
##                       10                       12                        7 
##                Kris Dunn            Collin Sexton             Ben McLemore 
##                        8                       14                       10 
##           Doug McDermott             Myles Turner              Paul George 
##                        5                       13                        5 
##            Austin Rivers            Devin Vassell            Anthony Black 
##                        8                        7                        2 
##           Damian Lillard             Cole Aldrich             Kyrie Irving 
##                        8                        6                       14 
##             Xavier Henry            Cameron Payne         Dereck Lively II 
##                        5                        8                        6 
##           Taurean Prince           Kira Lewis Jr.            Anthony Davis 
##                        7                        8                       15 
##              Jeremy Lamb         Denzel Valentine   Michael Kidd-Gilchrist 
##                        7                        9                       11 
##           Meyers Leonard          De'Andre Hunter         Donovan Mitchell 
##                        6                        7                        9 
##         Taylor Hendricks               Jaden Ivey              Buddy Hield 
##                       11                        5                        9 
##              Bam Adebayo              Greg Monroe           Jalen Williams 
##                       15                       10                        5 
##           Onyeka Okongwu          Stanley Johnson             Jakob Poeltl 
##                       12                        2                       13 
##          Al-Farouq Aminu              Gradey Dick             Ochai Agbaji 
##                       11                        5                        7 
##              Jalen Duren           Andre Drummond              Jett Howard 
##                       15                       15                        9 
##        Tyrese Haliburton            Chet Holmgren                 Ed Davis 
##                        7                       15                        6 
##             Kelly Olynyk               Alec Burks            Mikal Bridges 
##                        3                        2                        7 
##           Andrew Wiggins          Markieff Morris               RJ Barrett 
##                        4                        3                        4 
##             Johnny Davis               Obi Toppin         Tristan Thompson 
##                        5                       13                       12 
##         Patrick Williams              John Henson            Keegan Murray 
##                       10                        6                       13 
##              Jalen Smith           Romeo Langford       Karl-Anthony Towns 
##                       13                        2                       13 
##               Trey Lyles             Kemba Walker             Luke Kennard 
##                       13                        8                        9 
##         Shabazz Muhammad            Marcus Morris          Zion Williamson 
##                        4                        3                       12 
##            Cason Wallace            Jahlil Okafor              Moses Moody 
##                        5                       12                        2 
##             Joshua Primo            Klay Thompson         Domantas Sabonis 
##                        7                        8                       13 
##               Coby White          Jimmer Fredette                John Wall 
##                        8                        8                       14 
##            Aaron Nesmith               Lonzo Ball            Jeremy Sochan 
##                        9                       10                        3 
##           Brandon Knight           Scottie Barnes                Ja Morant 
##                        8                       10                       14 
##          Cade Cunningham              Isaac Okoro             Jaylen Brown 
##                       14                       10                        2 
##             Jayson Tatum           Jarrett Culver            Rui Hachimura 
##                        2                        5                        3 
##           Brandon Ingram              Cam Reddish           Markelle Fultz 
##                        2                        8                       14 
##          Jerome Robinson              T.J. Warren                Ekpe Udoh 
##                        8                       11                       11 
##              Zach LaVine             Marcus Smart          Ziaire Williams 
##                        7                        2                        5 
##              Evan Mobley          Marquese Chriss           Paolo Banchero 
##                       12                       11                        4 
## 
## Within cluster sum of squares by cluster:
##  [1]  0.000000 12.092124  7.703117 11.267129  8.308983  9.318869  9.151351
##  [8] 10.282343  7.057748  8.272202 13.467131 21.895704  8.222699 11.493769
## [15] 14.072811
##  (between_SS / total_SS =  93.1 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
fviz_cluster(df_cbb.kmeans, df_cluster, labelsize = 4, pointsize = 1, show.clust.cent = FALSE, repel = TRUE,
             xlab = "Dimension 1", ylab = "Dimension 2", main = "Clustering with K-means") + scale_y_reverse()

cluster_assignments <- df_cbb.kmeans$cluster
cluster_df15 <- data.frame(value = cluster_assignments, name = names(cluster_assignments))

cluster_assignments2 <- df_cbb.kmeans2$cluster
cluster_df10 <- data.frame(value = cluster_assignments2, name = names(cluster_assignments2))

cluster_assignments3 <- df_cbb.kmeans3$cluster
cluster_df5 <- data.frame(value = cluster_assignments3, name = names(cluster_assignments3))

cluster_df15 <- as.tibble(cluster_df15)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
cluster_df10 <- as.tibble(cluster_df10)
cluster_df5 <- as.tibble(cluster_df5)

cluster_df15 <- cluster_df15 |> rename(pc_cluster = value) |> 
  relocate(pc_cluster, .after = name)
cluster_df10 <- cluster_df10 |> rename(pc_cluster = value) |> 
  relocate(pc_cluster, .after = name)
cluster_df5 <- cluster_df5 |> rename(pc_cluster = value) |> 
  relocate(pc_cluster, .after = name)

cluster_df15
## # A tibble: 165 × 2
##    name             pc_cluster
##    <chr>                 <int>
##  1 Mo Bamba                 15
##  2 Jaxson Hayes             15
##  3 Harrison Barnes           5
##  4 Thomas Robinson           3
##  5 Brandon Miller            2
##  6 Anthony Bennett          11
##  7 Steven Adams              6
##  8 Nerlens Noel             15
##  9 Cameron Johnson           7
## 10 Dennis Smith Jr.         14
## # ℹ 155 more rows
df_cbb.sample$group <- df_cbb.kmeans$cluster
df_cbb.sample <- df_cbb.sample |> relocate(group, .before = dunk_made)
head(df_cbb.sample)
##                 group dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## Mo Bamba           15 2.1666667     2.2666667    0.956 3.633333     4.766667
## Jaxson Hayes       15 2.3125000     2.3437500    0.987 3.281250     3.843750
## Harrison Barnes     5 0.4533333     0.4533333    1.000 0.880000     1.200000
## Thomas Robinson     3 0.6666667     0.7904762    0.843 1.609524     2.495238
## Brandon Miller      2 0.6216216     0.7027027    0.885 2.297297     3.972973
## Anthony Bennett    11 1.5142857     1.6571429    0.914 2.857143     4.000000
##                 rim_pct rim_asted other2pt_made other2pt_attempts other2pt_pct
## Mo Bamba          0.762     0.532     0.7666667          2.500000        0.307
## Jaxson Hayes      0.854     0.829     0.5625000          1.437500        0.391
## Harrison Barnes   0.733     0.455     1.5066667          3.906667        0.386
## Thomas Robinson   0.645     0.627     0.7619048          2.180952        0.349
## Brandon Miller    0.578     0.259     0.8378378          2.513514        0.333
## Anthony Bennett   0.714     0.620     1.6000000          3.685714        0.434
##                 other2pt_asted fg2_pct fg3_per_g fg3a_per_g fg3_asted
## Mo Bamba                 0.217   0.603       0.5        1.7     0.857
## Jaxson Hayes             0.333   0.728       0.0        0.0     0.000
## Harrison Barnes          0.204   0.469       1.5        4.4     0.837
## Thomas Robinson          0.500   0.525       0.1        0.1     0.857
## Brandon Miller           0.032   0.483       2.9        7.5     0.830
## Anthony Bennett          0.768   0.587       1.0        2.7     0.972
##                 fg3_pct_per_g games ft_per_g fta_per_g ast_per_g_college
## Mo Bamba            0.2941176    30      2.7       4.0               0.5
## Jaxson Hayes        0.0000000    32      2.3       3.1               0.3
## Harrison Barnes     0.3409091    75      3.1       4.3               1.3
## Thomas Robinson     1.0000000   105      2.2       3.6               1.0
## Brandon Miller      0.3866667    37      3.9       4.6               2.1
## Anthony Bennett     0.3703704    35      3.5       5.1               1.0
##                 orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
## Mo Bamba              3.2       7.3       0.8       3.7       1.5
## Jaxson Hayes          1.8       3.3       0.6       2.2       0.9
## Harrison Barnes       2.0       3.5       0.9       0.4       1.9
## Thomas Robinson       2.1       5.2       0.6       0.7       1.6
## Brandon Miller        2.1       6.2       0.9       0.9       2.2
## Anthony Bennett       2.5       5.7       0.7       1.2       1.9
##                 pts_per_g_college
## Mo Bamba                     12.9
## Jaxson Hayes                 10.0
## Harrison Barnes              16.3
## Thomas Robinson               9.8
## Brandon Miller               18.8
## Anthony Bennett              16.1
df_cbb.sample |> group_by(group) |>
  summarize(across(everything(), mean)) |> print(n = 15, width = Inf)
## # A tibble: 15 × 27
##    group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <int>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>   <dbl>
##  1     1     0             0        0        3.28          6.28   0.522
##  2     2     0.468         0.514    0.912    2.30          3.65   0.633
##  3     3     0.380         0.431    0.906    1.34          1.86   0.727
##  4     4     1.17          1.28     0.915    3.34          5.06   0.662
##  5     5     0.329         0.354    0.931    1.40          2.12   0.668
##  6     6     0.784         0.833    0.943    1.49          2.04   0.745
##  7     7     0.271         0.301    0.914    1.04          1.55   0.682
##  8     8     0.108         0.125    0.899    1.18          1.98   0.602
##  9     9     0.124         0.147    0.785    0.814         1.35   0.608
## 10    10     0.632         0.708    0.897    2.16          3.19   0.676
## 11    11     1.03          1.12     0.931    2.59          3.77   0.687
## 12    12     2.02          2.14     0.943    4.93          6.48   0.760
## 13    13     0.754         0.815    0.921    2.11          2.90   0.734
## 14    14     0.387         0.428    0.770    2.70          4.35   0.624
## 15    15     2.06          2.16     0.959    3.51          4.49   0.787
##    rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted fg2_pct
##        <dbl>         <dbl>             <dbl>        <dbl>          <dbl>   <dbl>
##  1     0.114         1.19               2.78        0.427          0.026   0.493
##  2     0.309         1.31               3.56        0.362          0.161   0.501
##  3     0.589         0.717              1.69        0.432          0.437   0.581
##  4     0.440         1.90               5.17        0.365          0.291   0.511
##  5     0.387         0.894              2.39        0.369          0.242   0.509
##  6     0.705         0.636              1.65        0.351          0.633   0.581
##  7     0.468         0.616              1.50        0.397          0.296   0.546
##  8     0.238         0.887              2.31        0.380          0.186   0.476
##  9     0.266         0.578              1.45        0.394          0.146   0.487
## 10     0.441         0.823              2.19        0.368          0.289   0.560
## 11     0.515         1.24               3.26        0.381          0.444   0.555
## 12     0.539         1.63               3.99        0.415          0.369   0.622
## 13     0.567         0.888              2.14        0.416          0.491   0.599
## 14     0.221         1.52               4.03        0.37           0.083   0.509
## 15     0.634         0.905              2.59        0.357          0.469   0.630
##    fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
##        <dbl>      <dbl>     <dbl>         <dbl> <dbl>    <dbl>     <dbl>
##  1    3.7        10.3       0.263        0.359   32       7.4       8.6 
##  2    1.52        4.29      0.731        0.342   41.6     4.07      5.38
##  3    0.45        1.31      0.935        0.414   95.5     2.31      3.35
##  4    1.1         3.21      0.891        0.331   36.3     4.13      5.86
##  5    1.55        4.28      0.725        0.359   61.7     2.88      3.68
##  6    0.0125      0.125     0.375        0.0312  72.1     1.62      2.7 
##  7    1.4         3.69      0.870        0.379   73.3     1.75      2.27
##  8    1.95        5.3       0.614        0.367   78.4     3.69      4.69
##  9    1.86        4.86      0.744        0.378   75.2     2.31      2.91
## 10    1.28        3.44      0.882        0.353   44       3         4.08
## 11    0.654       1.78      0.865        0.332   45.5     3.44      4.92
## 12    0.233       0.733     0.624        0.149   34.1     4.01      6.38
## 13    0.546       1.54      0.797        0.346   52.8     2.73      3.7 
## 14    1.75        4.56      0.479        0.379   33.1     4.84      6.25
## 15    0.19        0.58      0.364        0.0888  32.3     2.62      4.11
##    ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
##                <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
##  1              8.7      0.4        3.5      1.7       0.3        5.2 
##  2              2.94     1.33       4.29     1.38      0.633      2.47
##  3              1.32     1.79       4.08     0.812     0.712      1.39
##  4              2.2      2.43       5.14     1.03      0.786      2.44
##  5              2.25     1.19       4.08     1.26      0.482      2.04
##  6              0.8      2.28       4.39     0.538     2.09       1.26
##  7              1.76     1.06       2.99     1.06      0.493      1.35
##  8              3.81     0.693      3.34     1.41      0.364      2.7 
##  9              3.18     0.818      3.04     1.07      0.382      1.79
## 10              2.49     1.55       4.55     1.1       0.838      2.08
## 11              1.48     2.68       5.21     1.02      1.47       2.05
## 12              1.9      3.49       6.14     1.13      1.84       2.3 
## 13              1.17     2.14       4.99     0.677     1.63       1.54
## 14              5.4      0.95       4        1.52      0.55       3.32
## 15              1.05     2.73       5.84     0.98      2.97       1.75
##    pts_per_g_college
##                <dbl>
##  1             27.4 
##  2             16.6 
##  3             10.5 
##  4             17.9 
##  5             14.6 
##  6              8.34
##  7             11.1 
##  8             16.9 
##  9             12.6 
## 10             13.9 
## 11             14.7 
## 12             17.9 
## 13             12.5 
## 14             19.1 
## 15             12.0
df_cbb.sample |> group_by(group) |>
  summarize(across(everything(), median))
## # A tibble: 15 × 27
##    group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <int>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>   <dbl>
##  1     1     0             0        0        3.28          6.28   0.522
##  2     2     0.447         0.457    0.916    2.22          3.71   0.640
##  3     3     0.358         0.381    0.926    1.34          1.82   0.735
##  4     4     1.05          1.13     0.924    3.3           4.92   0.67 
##  5     5     0.298         0.344    0.938    1.44          2.27   0.667
##  6     6     0.721         0.783    0.954    1.34          1.78   0.741
##  7     7     0.302         0.32     0.902    1.08          1.52   0.692
##  8     8     0.108         0.108    0.966    1.04          1.70   0.594
##  9     9     0.136         0.167    0.833    0.766         1.35   0.617
## 10    10     0.541         0.641    0.9      2.37          3.33   0.657
## 11    11     0.85          0.9      0.938    2.59          3.69   0.699
## 12    12     1.94          2.08     0.951    4.82          6.64   0.762
## 13    13     0.766         0.812    0.93     2.12          2.89   0.722
## 14    14     0.345         0.405    0.91     2.66          4.33   0.628
## 15    15     2.24          2.31     0.958    3.63          4.56   0.776
## # ℹ 20 more variables: rim_asted <dbl>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <dbl>, other2pt_asted <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_asted <dbl>,
## #   fg3_pct_per_g <dbl>, games <dbl>, ft_per_g <dbl>, fta_per_g <dbl>,
## #   ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, stl_per_g <dbl>,
## #   blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
df_career_stats |> filter(player == "Jaylen Brown") |>
  select(player, pick_overall, year, pts_per_g, trb_per_g, ast_per_g, vorp, g)
## # A tibble: 1 × 8
##   player       pick_overall  year pts_per_g trb_per_g ast_per_g  vorp     g
##   <chr>               <dbl> <dbl>     <dbl>     <dbl>     <dbl> <dbl> <dbl>
## 1 Jaylen Brown            3  2016      18.6       5.3       2.4   9.7   540
df_career_stats |> filter(year >= 2010)|> 
  group_by(pick_overall) |> 
  summarize(across(everything(), function(x) mean(x))) |>
  select(pick_overall, pts_per_g, trb_per_g, ast_per_g, vorp, g)
## Warning: There were 122 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), function(x) mean(x))`.
## ℹ In group 1: `pick_overall = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 121 remaining warnings.
## # A tibble: 61 × 6
##    pick_overall pts_per_g trb_per_g ast_per_g  vorp     g
##           <dbl>     <dbl>     <dbl>     <dbl> <dbl> <dbl>
##  1            1     18.9       6.65      4.28 14.2   380.
##  2            2     14.7       5.14      3.19  3.91  338.
##  3            3     17.4       6.58      3.31 12.0   420.
##  4            4     12         5.32      1.91  3     372.
##  5            5     12.5       4.61      3.36  4.84  377.
##  6            6      9.92      4.5       2.17  6.57  346.
##  7            7     12.4       5.08      2.51  3.77  430.
##  8            8      9.21      3.31      1.91  1.71  390.
##  9            9     10.3       4.76      2.31  5.61  418.
## 10           10      9.74      3.46      2.13  5.14  370.
## # ℹ 51 more rows
df_cluster2 <- df_cbb_scaled

df_cluster2$name <- rownames(df_cbb)
df_cluster2 <- df_cluster2 |> relocate(name, .before = dunk_made)

df_cluster2 <- column_to_rownames(df_cluster2, var = "name")

k1 <- 15
k2 <- 10
k3 <- 5

k15 <- kmeans(df_cluster2, centers = k1, nstart = 50)
k10 <-  kmeans(df_cluster2, centers = k2, nstart = 50)
k5 <- kmeans(df_cluster2, centers = k3, nstart = 50)

temp_assign <- k15$cluster

combine <- function(df, kmeans) {
  temp_assign <- kmeans$cluster
  df_temp <- as.tibble(data.frame(value = temp_assign, name = names(temp_assign)))
  if(!("all_cluster" %in% colnames(df_temp))) {
    df_temp <- df_temp |> rename(all_cluster = value)
  }
  cluster_df <- left_join(df, df_temp, by = c("name"))
  cluster_df <- cluster_df |> relocate(pc_cluster, .after = all_cluster)
  # cluster_df <- cluster_df |> group_by(all_cluster, pc_cluster) |> summarize(n = n())
  # return(cluster_df)
  return(cluster_df)
}

groups15 <- combine(cluster_df15, k15) |> arrange(pc_cluster, all_cluster)
groups10 <- combine(cluster_df10, k10)
groups5 <- combine(cluster_df5, k5)

groups15
## # A tibble: 165 × 3
##    name                    all_cluster pc_cluster
##    <chr>                         <int>      <int>
##  1 Trae Young                        6          1
##  2 Elfrid Payton                     1          2
##  3 Anthony Black                     1          2
##  4 Jaylen Brown                      1          2
##  5 Marcus Smart                      1          2
##  6 De'Aaron Fox                      2          2
##  7 Shai Gilgeous-Alexander           2          2
##  8 Jalen Suggs                      10          2
##  9 Brandon Miller                   14          2
## 10 Malik Monk                       14          2
## # ℹ 155 more rows
df_cbb <- df_cbb |> rownames_to_column(var = "name")
df_groups <- left_join(df_cbb, groups15, by = c("name"))
df_cbb <- df_cbb |> column_to_rownames(var = "name")
library(ggforce)
df_groups <- df_groups |> 
  mutate(bust = if_else(name %in% bust_list, 1, 0),
         good = if_else(name %in% good_list, 1, 0))


df_groups |> group_by(all_cluster) |>
  summarize(n = n(),
            across(everything(), mean),
            ratio = good / bust) |> 
  select(-name) |>
  print(width = Inf)
## Warning: There were 15 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), mean)`.
## ℹ In group 1: `all_cluster = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.
## # A tibble: 15 × 32
##    all_cluster     n dunk_made dunk_attempts dunk_pct rim_made rim_attempts
##          <int> <dbl>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>
##  1           1    15    0.433         0.481     0.902     1.98         3.01
##  2           2     9    0.543         0.589     0.896     3.14         5.06
##  3           3     3    2.12          2.17      0.979     3.57         4.34
##  4           4    22    0.684         0.743     0.920     1.88         2.70
##  5           5     2    0             0         0         1.42         2.14
##  6           6     1    0             0         0         3.28         6.28
##  7           7     4    2.30          2.44      0.942     5.86         7.67
##  8           8    22    0.263         0.294     0.887     1.10         1.78
##  9           9    14    1.28          1.39      0.925     3.13         4.45
## 10          10     7    0.411         0.445     0.934     1.88         2.72
## 11          11    10    1.96          2.09      0.941     3.73         5.00
## 12          12    12    0.0783        0.0854    0.932     1.11         1.83
## 13          13    14    0.376         0.430     0.896     1.23         1.72
## 14          14    19    0.466         0.512     0.907     1.98         3.13
## 15          15    11    0.780         0.823     0.947     1.78         2.40
##    rim_pct rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted
##      <dbl>     <dbl>         <dbl>             <dbl>        <dbl>          <dbl>
##  1   0.663     0.314         0.574              1.92        0.290         0.176 
##  2   0.618     0.228         1.62               4.33        0.366         0.0983
##  3   0.825     0.556         0.964              2.55        0.391         0.281 
##  4   0.700     0.548         0.884              2.14        0.414         0.474 
##  5   0.664     0.062         0.788              1.82        0.438         0.024 
##  6   0.522     0.114         1.19               2.78        0.427         0.026 
##  7   0.763     0.529         1.58               3.84        0.428         0.402 
##  8   0.621     0.407         0.585              1.63        0.357         0.272 
##  9   0.707     0.497         1.65               4.31        0.382         0.409 
## 10   0.698     0.300         0.731              1.71        0.449         0.104 
## 11   0.745     0.621         1.06               2.90        0.366         0.456 
## 12   0.613     0.214         0.907              2.25        0.402         0.181 
## 13   0.719     0.538         0.733              1.69        0.438         0.313 
## 14   0.637     0.372         1.65               4.32        0.382         0.220 
## 15   0.750     0.681         0.685              1.80        0.357         0.606 
##    fg2_pct fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
##      <dbl>     <dbl>      <dbl>     <dbl>         <dbl> <dbl>    <dbl>     <dbl>
##  1   0.514    0.947       2.89      0.724        0.313   61.6     3.55      5.07
##  2   0.508    1.41        4         0.582        0.349   36.7     4.62      6.17
##  3   0.664    0.633       1.83      0.879        0.296   34       2.83      4.07
##  4   0.576    0.955       2.43      0.893        0.434   59.3     3.09      4.09
##  5   0.526    1.15        2.65      0.572        0.423   42       3.65      4.35
##  6   0.493    3.7        10.3       0.263        0.359   32       7.4       8.6 
##  7   0.648    0.425       1.28      0.905        0.252   33.5     4.47      6.75
##  8   0.496    1.83        4.94      0.780        0.369   65.6     2.28      2.87
##  9   0.550    0.629       1.8       0.940        0.308   38.9     3.47      5.12
## 10   0.584    1.53        4.03      0.717        0.370   41.9     1.86      2.57
## 11   0.610    0           0.02      0            0       36.7     2.9       4.79
## 12   0.488    1.98        5.3       0.602        0.371   83.5     3.82      4.78
## 13   0.570    0.857       2.33      0.924        0.356   82.9     2.04      2.77
## 14   0.489    1.82        4.98      0.777        0.359   40.2     3.83      4.91
## 15   0.589    0.0182      0.182     0.432        0.0341  64.6     1.97      3.15
##    ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
##                <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
##  1             3.27      1.51       4.41     1.6       0.753      2.68
##  2             5.32      1.02       4.12     1.46      0.522      3.28
##  3             1.23      2.67       7.6      1         4.03       1.47
##  4             1.31      2.06       5.18     0.764     1.31       1.69
##  5             6.15      0.35       2.5      1.3       0.3        2.55
##  6             8.7       0.4        3.5      1.7       0.3        5.2 
##  7             2.5       3.5        7.32     1.38      1.35       2.52
##  8             1.91      0.982      3.33     1.07      0.409      1.66
##  9             1.84      2.78       5.43     1.04      1.54       2.16
## 10             4.57      0.914      3.54     1.8       0.571      2.26
## 11             1.04      3.02       4.88     1         2.27       1.9 
## 12             3.62      0.7        3.3      1.35      0.35       2.48
## 13             1.32      1.39       3.24     0.871     0.643      1.34
## 14             2.2       1.45       4.28     1.06      0.6        2.18
## 15             0.927     2.28       4.5      0.564     1.95       1.4 
##    pts_per_g_college pc_cluster   bust  good ratio
##                <dbl>      <dbl>  <dbl> <dbl> <dbl>
##  1             13.9        6.4  0      0.333 Inf  
##  2             18.8       10.2  0      0.444 Inf  
##  3             13.7       15    0      0.667 Inf  
##  4             13.9       10.8  0.136  0.409   3  
##  5             12.4       11.5  0.5    0.5     1  
##  6             27.4        1    0      1     Inf  
##  7             20.7       12    0      0.5   Inf  
##  8             12.9        7.05 0.182  0.273   1.5
##  9             15.4        9.14 0.143  0.357   2.5
## 10             12.6        6.86 0      0.429 Inf  
## 11             13.1       13.7  0.1    0.5     5  
## 12             17.4        8.08 0.0833 0.5     6  
## 13             10.7        5.5  0      0.286 Inf  
## 14             17.1        4.05 0.158  0.316   2  
## 15              9.08       8.09 0      0.455 Inf
df_pc <- left_join(df_cluster |> rownames_to_column("name"), groups15, by = "name")
df_pc <- left_join(df_pc, df_groups |> select(name, bust, good), by = "name")

df_pc <- df_pc |> column_to_rownames("name")
df_pc <- df_pc |> 
  mutate(
    pc_cluster = as.factor(pc_cluster),
    all_cluster = as.factor(all_cluster)
  )

hulls <- df_pc |>
  group_by(all_cluster) |>
  slice(chull(PC1, PC2))

p <- ggplot(data = df_pc, aes(x = PC1, y = PC2, color = all_cluster)) + 
  geom_polygon(data = hulls, 
               aes(group = all_cluster, color = all_cluster, fill = all_cluster), 
               alpha = 0.2) +
  geom_point() +
  geom_point(data = df_pc |> filter(bust == 1), size = 3, color = "black", shape = 10, show.legend = FALSE) + 
  geom_point(data = df_pc |> filter(good == 1), size = 3, color = "black", shape = 5, show.legend = FALSE) +
  scale_x_reverse() +
  labs(title = "Clusters using higher dimensional data",
       x = "Dimension 1",
       y = "Dimension 2",
       color = "Cluster",
       fill = "Cluster"
       )
  

p

p2 <- p + 
  geom_label_repel(data = df_pc |> rownames_to_column(var = "name") |> filter(bust == 1 | good == 1),
                   aes(label = name),
                   size = 1.6,
                   max.overlaps = 20,
                   fill = NA, 
                   label.size = NA,
                   segment.size = 0.2
                   )
p2

df_groups |> group_by(pc_cluster) |>
  summarize(n = n(),
            across(everything(), mean),
            ratio = good / bust) |> 
  select(-name) |>
  print(width = Inf)
## Warning: There were 15 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), mean)`.
## ℹ In group 1: `pc_cluster = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.
## # A tibble: 15 × 32
##    pc_cluster     n dunk_made dunk_attempts dunk_pct rim_made rim_attempts
##         <int> <dbl>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>
##  1          1     1     0             0        0        3.28          6.28
##  2          2    18     0.468         0.514    0.912    2.30          3.65
##  3          3     8     0.380         0.431    0.906    1.34          1.86
##  4          4     7     1.17          1.28     0.915    3.34          5.06
##  5          5    17     0.329         0.354    0.931    1.40          2.12
##  6          6     8     0.784         0.833    0.943    1.49          2.04
##  7          7    15     0.271         0.301    0.914    1.04          1.55
##  8          8    14     0.108         0.125    0.899    1.18          1.98
##  9          9    11     0.124         0.147    0.785    0.814         1.35
## 10         10    13     0.632         0.708    0.897    2.16          3.19
## 11         11    13     1.03          1.12     0.931    2.59          3.77
## 12         12     9     2.02          2.14     0.943    4.93          6.48
## 13         13    13     0.754         0.815    0.921    2.11          2.90
## 14         14     8     0.387         0.428    0.770    2.70          4.35
## 15         15    10     2.06          2.16     0.959    3.51          4.49
##    rim_pct rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted
##      <dbl>     <dbl>         <dbl>             <dbl>        <dbl>          <dbl>
##  1   0.522     0.114         1.19               2.78        0.427          0.026
##  2   0.633     0.309         1.31               3.56        0.362          0.161
##  3   0.727     0.589         0.717              1.69        0.432          0.437
##  4   0.662     0.440         1.90               5.17        0.365          0.291
##  5   0.668     0.387         0.894              2.39        0.369          0.242
##  6   0.745     0.705         0.636              1.65        0.351          0.633
##  7   0.682     0.468         0.616              1.50        0.397          0.296
##  8   0.602     0.238         0.887              2.31        0.380          0.186
##  9   0.608     0.266         0.578              1.45        0.394          0.146
## 10   0.676     0.441         0.823              2.19        0.368          0.289
## 11   0.687     0.515         1.24               3.26        0.381          0.444
## 12   0.760     0.539         1.63               3.99        0.415          0.369
## 13   0.734     0.567         0.888              2.14        0.416          0.491
## 14   0.624     0.221         1.52               4.03        0.37           0.083
## 15   0.787     0.634         0.905              2.59        0.357          0.469
##    fg2_pct fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
##      <dbl>     <dbl>      <dbl>     <dbl>         <dbl> <dbl>    <dbl>     <dbl>
##  1   0.493    3.7        10.3       0.263        0.359   32       7.4       8.6 
##  2   0.501    1.52        4.29      0.731        0.342   41.6     4.07      5.38
##  3   0.581    0.45        1.31      0.935        0.414   95.5     2.31      3.35
##  4   0.511    1.1         3.21      0.891        0.331   36.3     4.13      5.86
##  5   0.509    1.55        4.28      0.725        0.359   61.7     2.88      3.68
##  6   0.581    0.0125      0.125     0.375        0.0312  72.1     1.62      2.7 
##  7   0.546    1.4         3.69      0.870        0.379   73.3     1.75      2.27
##  8   0.476    1.95        5.3       0.614        0.367   78.4     3.69      4.69
##  9   0.487    1.86        4.86      0.744        0.378   75.2     2.31      2.91
## 10   0.560    1.28        3.44      0.882        0.353   44       3         4.08
## 11   0.555    0.654       1.78      0.865        0.332   45.5     3.44      4.92
## 12   0.622    0.233       0.733     0.624        0.149   34.1     4.01      6.38
## 13   0.599    0.546       1.54      0.797        0.346   52.8     2.73      3.7 
## 14   0.509    1.75        4.56      0.479        0.379   33.1     4.84      6.25
## 15   0.630    0.19        0.58      0.364        0.0888  32.3     2.62      4.11
##    ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
##                <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
##  1              8.7      0.4        3.5      1.7       0.3        5.2 
##  2              2.94     1.33       4.29     1.38      0.633      2.47
##  3              1.32     1.79       4.08     0.812     0.712      1.39
##  4              2.2      2.43       5.14     1.03      0.786      2.44
##  5              2.25     1.19       4.08     1.26      0.482      2.04
##  6              0.8      2.28       4.39     0.538     2.09       1.26
##  7              1.76     1.06       2.99     1.06      0.493      1.35
##  8              3.81     0.693      3.34     1.41      0.364      2.7 
##  9              3.18     0.818      3.04     1.07      0.382      1.79
## 10              2.49     1.55       4.55     1.1       0.838      2.08
## 11              1.48     2.68       5.21     1.02      1.47       2.05
## 12              1.9      3.49       6.14     1.13      1.84       2.3 
## 13              1.17     2.14       4.99     0.677     1.63       1.54
## 14              5.4      0.95       4        1.52      0.55       3.32
## 15              1.05     2.73       5.84     0.98      2.97       1.75
##    pts_per_g_college all_cluster   bust  good ratio
##                <dbl>       <dbl>  <dbl> <dbl> <dbl>
##  1             27.4         6    0      1       Inf
##  2             16.6         9.56 0.0556 0.444     8
##  3             10.5        10.8  0.125  0.25      2
##  4             17.9         9.43 0      0.143   Inf
##  5             14.6         7.41 0.235  0.235     1
##  6              8.34       15    0      0.375   Inf
##  7             11.1        10.5  0      0.333   Inf
##  8             16.9        11.1  0.0714 0.429     6
##  9             12.6         7.64 0.182  0.182     1
## 10             13.9         5.46 0.154  0.462     3
## 11             14.7         7.38 0.231  0.231     1
## 12             17.9         8.78 0.111  0.556     5
## 13             12.5         5.69 0      0.538   Inf
## 14             19.1         3.88 0      0.5     Inf
## 15             12.0         9    0      0.7     Inf
hulls <- df_pc |>
  group_by(pc_cluster) |>
  slice(chull(PC1, PC2))

p <- ggplot(data = df_pc, aes(x = PC1, y = PC2, color = pc_cluster)) + 
  geom_polygon(data = hulls, 
               aes(group = pc_cluster, color = pc_cluster, fill = pc_cluster), 
               alpha = 0.2) +
  geom_point() +
  geom_point(data = df_pc |> filter(bust == 1), size = 3, color = "black", shape = 10, show.legend = FALSE) + 
  geom_point(data = df_pc |> filter(good == 1), size = 3, color = "black", shape = 5, show.legend = FALSE) +
  scale_y_reverse() +
  labs(title = "Clusters with Busts and Successes",
       x = "Dimension 1",
       y = "Dimension 2",
       color = "Cluster",
       fill = "Cluster"
  )

p

p2 <- p + 
  geom_label_repel(data = df_pc |> rownames_to_column(var = "name") |> filter(bust == 1 | good == 1),
                   aes(label = name),
                   size = 1.6,
                   max.overlaps = 20,
                   fill = NA, 
                   label.size = NA,
                   segment.size = 0.2
                   )

p2